#!/usr/bin/ruby

# Simulate hadoop well enough to do local testing.

case ARGV[0]
# 
# Do hadoop fs commands
when 'fs':
  ARGV.shift
  case ARGV[0]
  when '-rmr':
    dest = ARGV[1]
    if dest.nil?
      STDERR.puts "missing argument"
      exit 1
    end
    system("rm -rf #{dest}")
  when '-rm':
    dest = ARGV[1]
    if dest.nil?
      STDERR.puts "missing argument"
      exit 1
    end
    system("rm -f #{dest}")
  when '-mkdir':
    dest = ARGV[1]
    if dest.nil?
      STDERR.puts "missing argument"
      exit 1
    end
    system("mkdir -p #{dest}")
  when '-copyFromLocal':
  when '-put':
    src = ARGV[1]
    dest = ARGV[2]
    if src.nil? || dest.nil? 
      STDERR.puts "missing argument"
      exit 1
    end
   system("cp #{src} #{dest}")
  when '-cat':
    src = ARGV[1]
    if src.nil? 
      STDERR.puts "missing argument"
      exit 1
    end
    if  File.exists?(src)
      system("cat #{src}")
    end
  when '-ls':
    dest = ARGV[1]
    if dest.nil? 
      system("cd #{HADOOP_DIR}; ls -l '--time-style=+%Y-%m-%d %H:%M'")
    else
      system("cd #{HADOOP_DIR};ls -l '--time-style=+%Y-%m-%d %H:%M' #{dest}")
    end
  when '-lsr':
    dest = ARGV[1]
    if dest == '/user'
      # special test data
      system("cat sample-ls-data")
    else
      system("ls -lR '--time-style=+%Y-%m-%d %H:%M' #{dest}")
    end
  else
    STDERR.puts "command not recognized #{ARGV[0]}"
    exit 1
  end
#
# Do hadoop jar commands (run map reduce)
when 'jar':
  ARGV.shift
  jar = ARGV[0]
  ARGV.shift
  file = []
  reducers = 1
  while ARGV.size > 0
    case ARGV[0]
    when '-input':
      indir = ARGV[1]; 2.times {ARGV.shift}
    when '-output':
      outdir = ARGV[1]; 2.times {ARGV.shift}
    when '-mapper':
      mapper = ARGV[1]; 2.times {ARGV.shift}
    when '-reducer':
      reducer = ARGV[1]; 2.times {ARGV.shift}
    when '-jobconf':
      jobconf = ARGV[1]; 2.times {ARGV.shift}
      if jobconf =~ /mapred.reduce.tasks=([\d]*)/
        reducers = $1.to_i
      end
    when '-file':
      file << ARGV[1]; 2.times {ARGV.shift}
    end 
  end
  system("hadoop fs -mkdir #{outdir}")
  # Simulate hadoop by running mapper, then sort, then run reducer.
  cmd = "ls #{indir}|while read line;do #{mapper} < #{indir}/$line;done|LC_ALL=C sort|#{reducer} >#{outdir}/part-00000"
  #puts "running: #{cmd}"
  system cmd
else
  STDERR.puts "*** hadoop #{ARGV.join(' ')}"
  exit 1
end
